ggplot(dat_model, aes(period, fill = S_Haushaltsgroesse)) + geom_bar(position = "fill")
knitr::kable(table(dat_model$S_Haushaltsgroesse,
dat_model$JS_HUR_Reisebegleitung_HH))
| 1 | 2 | 3 | 4 | 5 | 5.3 | |
|---|---|---|---|---|---|---|
| 1 | 42406 | 0 | 0 | 0 | 0 | 0 |
| 2 | 7082 | 63473 | 0 | 0 | 0 | 0 |
| 3 | 3974 | 9098 | 16918 | 0 | 0 | 0 |
| 4 | 2464 | 4209 | 2487 | 15451 | 0 | 0 |
| 5.3 | 1014 | 1190 | 528 | 1002 | 3186 | 739 |
dat_model %>%
group_by(S_Haushaltsgroesse_14plus, S_Haushaltsgroesse_Kinder) %>%
summarize(N = length(S_Haushaltsgroesse_equi),
min_HHsize_equi = min(S_Haushaltsgroesse_equi, na.rm = T),
mean_HHsize_equi = mean(S_Haushaltsgroesse_equi, na.rm = T),
median_HHsize_equi = median(S_Haushaltsgroesse_equi, na.rm = T),
max_HHsize_equi = max(S_Haushaltsgroesse_equi, na.rm = T)) %>%
knitr::kable()
## `summarise()` has grouped output by 'S_Haushaltsgroesse_14plus'. You can override using the `.groups` argument.
| S_Haushaltsgroesse_14plus | S_Haushaltsgroesse_Kinder | N | min_HHsize_equi | mean_HHsize_equi | median_HHsize_equi | max_HHsize_equi |
|---|---|---|---|---|---|---|
| 1.0 | 0.0 | 42596 | 1.00 | 1.00 | 1.00 | 1.00 |
| 1.0 | 1.0 | 1950 | 1.30 | 1.30 | 1.30 | 1.30 |
| 1.0 | 2.0 | 705 | 1.60 | 1.60 | 1.60 | 1.60 |
| 1.0 | 3.0 | 114 | 1.90 | 1.90 | 1.90 | 1.90 |
| 1.0 | 4.3 | 29 | 2.29 | 2.29 | 2.29 | 2.29 |
| 2.0 | 0.0 | 68711 | 1.50 | 1.50 | 1.50 | 1.50 |
| 2.0 | 1.0 | 13421 | 1.80 | 1.80 | 1.80 | 1.80 |
| 2.0 | 2.0 | 11898 | 2.10 | 2.10 | 2.10 | 2.10 |
| 2.0 | 3.3 | 2403 | 2.49 | 2.49 | 2.49 | 2.49 |
| 3.0 | 0.0 | 15924 | 2.00 | 2.00 | 2.00 | 2.00 |
| 3.0 | 1.0 | 4530 | 2.30 | 2.30 | 2.30 | 2.30 |
| 3.0 | 2.3 | 1499 | 2.69 | 2.69 | 2.69 | 2.69 |
| 4.0 | 0.0 | 8103 | 2.50 | 2.50 | 2.50 | 2.50 |
| 4.0 | 1.3 | 1515 | 2.89 | 2.89 | 2.89 | 2.89 |
| 5.3 | 0.0 | 2242 | 3.15 | 3.15 | 3.15 | 3.15 |
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, JS_HUR_Ausgaben_gesamt)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, JS_HUR_Ausgaben_gesamt_equi)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, S_Einkommen_HH)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, S_Einkommen_HH_equi)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
Personen-adjustierte Reiseausgaben pro personen-adjustiertem Einkommen
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, JS_HUR_Ausgaben_gesamt_equi)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
Verwendete Kovariablen:
- Reisejahr - Alter des Hauptverdieners im HH - Geschlecht - personen-adjustiertes Einkommen im HH - Anzahl Reisebegleitung gesamt (unabhängig ob HH oder nicht-HH) - Bildung - Sind Kleinkinder (<=5 Jahre) im Haushalt ja/nein? - Stadtgröße
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
mutate(period = factor(period)) %>%
ggplot(aes(period, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
dat_model %>%
mutate(age = factor(age)) %>%
ggplot(aes(age, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
mutate(age = factor(age)) %>%
ggplot(aes(age, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Geschlecht, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Geschlecht, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Kinder_0_bis_5_binaer, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Kinder_0_bis_5_binaer, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Wohnortgroesse, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Wohnortgroesse, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Bildung_HV, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
## Warning: Removed 419 rows containing non-finite values (stat_boxplot).
dat_model %>%
ggplot(aes(S_Bildung_HV, rel_expenses)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90)) +
ylim(c(0,3))
## Warning: Removed 3746 rows containing non-finite values (stat_boxplot).
ggplot(dat_model, aes(S_Einkommen_HH_equi, rel_expenses)) +
geom_hex()
## Warning: Removed 419 rows containing non-finite values (stat_binhex).
ggplot(dat_model, aes(JS_HUR_Reisebegleitung_Gesamtanzahl, rel_expenses)) +
geom_hex()
## Warning: Removed 419 rows containing non-finite values (stat_binhex).
model_pure <- bam(formula = rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10)),
family = Gamma(link = "log"),
data = dat_model)
summary(model_pure)
##
## Family: Gamma
## Link function: log
##
## Formula:
## rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.095174 0.002493 -38.17 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(period,age) 63.72 74.25 43.67 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.019 Deviance explained = 4.23%
## fREML = 2.556e+05 Scale est. = 1.0814 n = 175221
model_cov <- bam(formula = rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10)) +
S_Geschlecht + S_Kinder_0_bis_5_binaer + S_Wohnortgroesse +
S_Bildung_HV + S_Haushaltsgroesse +
s(S_Einkommen_HH_equi, bs = "ps", k = 10),
family = Gamma(link = "log"),
data = dat_model)
summary(model_cov)
##
## Family: Gamma
## Link function: log
##
## Formula:
## rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10)) + S_Geschlecht +
## S_Kinder_0_bis_5_binaer + S_Wohnortgroesse + S_Bildung_HV +
## S_Haushaltsgroesse + s(S_Einkommen_HH_equi, bs = "ps", k = 10)
##
## Parametric coefficients:
## Estimate
## (Intercept) -0.161402
## S_Geschlechtweiblich -0.043693
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe -0.118062
## S_Wohnortgroesse5.000 bis 49.999 0.043348
## S_Wohnortgroesse50.000 bis 99.999 0.094123
## S_Wohnortgroesse100.000 bis 499.999 0.097277
## S_Wohnortgroesse500.000 und mehr 0.160953
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur 0.051150
## S_Bildung_HVAbitur/(Fach-)Hochschulreife 0.100183
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum 0.086226
## S_Haushaltsgroesse2 -0.041537
## S_Haushaltsgroesse3 -0.086559
## S_Haushaltsgroesse4 -0.096317
## S_Haushaltsgroesse5.3 -0.137658
## Std. Error t value
## (Intercept) 0.006715 -24.037
## S_Geschlechtweiblich 0.003659 -11.942
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe 0.007251 -16.281
## S_Wohnortgroesse5.000 bis 49.999 0.005571 7.780
## S_Wohnortgroesse50.000 bis 99.999 0.007666 12.278
## S_Wohnortgroesse100.000 bis 499.999 0.006553 14.845
## S_Wohnortgroesse500.000 und mehr 0.006597 24.397
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur 0.004392 11.647
## S_Bildung_HVAbitur/(Fach-)Hochschulreife 0.006291 15.924
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum 0.006198 13.911
## S_Haushaltsgroesse2 0.004804 -8.646
## S_Haushaltsgroesse3 0.006243 -13.866
## S_Haushaltsgroesse4 0.006882 -13.997
## S_Haushaltsgroesse5.3 0.010181 -13.521
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## S_Geschlechtweiblich < 2e-16 ***
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe < 2e-16 ***
## S_Wohnortgroesse5.000 bis 49.999 7.27e-15 ***
## S_Wohnortgroesse50.000 bis 99.999 < 2e-16 ***
## S_Wohnortgroesse100.000 bis 499.999 < 2e-16 ***
## S_Wohnortgroesse500.000 und mehr < 2e-16 ***
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur < 2e-16 ***
## S_Bildung_HVAbitur/(Fach-)Hochschulreife < 2e-16 ***
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum < 2e-16 ***
## S_Haushaltsgroesse2 < 2e-16 ***
## S_Haushaltsgroesse3 < 2e-16 ***
## S_Haushaltsgroesse4 < 2e-16 ***
## S_Haushaltsgroesse5.3 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(period,age) 57.487 67.45 55.8 <2e-16 ***
## s(S_Einkommen_HH_equi) 8.378 8.64 2063.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.227 Deviance explained = 21%
## fREML = 1.9287e+05 Scale est. = 0.55933 n = 170757
model_cov2 <- bam(formula = rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10)) +
S_Geschlecht + S_Kinder_0_bis_5_binaer + S_Wohnortgroesse +
S_Bildung_HV +
s(S_Einkommen_HH_equi, bs = "ps", k = 10) +
JS_HUR_Reisedauer,
family = Gamma(link = "log"),
data = dat_model)
summary(model_cov2)
##
## Family: Gamma
## Link function: log
##
## Formula:
## rel_expenses ~ te(period, age, bs = "ps", k = c(10, 10)) + S_Geschlecht +
## S_Kinder_0_bis_5_binaer + S_Wohnortgroesse + S_Bildung_HV +
## s(S_Einkommen_HH_equi, bs = "ps", k = 10) + JS_HUR_Reisedauer
##
## Parametric coefficients:
## Estimate
## (Intercept) -0.945055
## S_Geschlechtweiblich -0.020226
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe -0.169620
## S_Wohnortgroesse5.000 bis 49.999 -0.007797
## S_Wohnortgroesse50.000 bis 99.999 -0.001494
## S_Wohnortgroesse100.000 bis 499.999 -0.006705
## S_Wohnortgroesse500.000 und mehr 0.012142
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur 0.052490
## S_Bildung_HVAbitur/(Fach-)Hochschulreife 0.068192
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum 0.060609
## JS_HUR_Reisedauer6 bis 8 Tage 0.318403
## JS_HUR_Reisedauer9 bis 12 Tage 0.610221
## JS_HUR_Reisedauer13 bis 15 Tage 0.854207
## JS_HUR_Reisedauer16 bis 19 Tage 0.975750
## JS_HUR_Reisedauer20 bis 22 Tage 1.059951
## JS_HUR_Reisedauer23 bis 26 Tage 1.132588
## JS_HUR_Reisedauer27 bis 29 Tage 1.221721
## JS_HUR_Reisedauer30 Tage und mehr 1.350763
## Std. Error t value
## (Intercept) 0.012031 -78.552
## S_Geschlechtweiblich 0.003339 -6.058
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe 0.006157 -27.549
## S_Wohnortgroesse5.000 bis 49.999 0.005096 -1.530
## S_Wohnortgroesse50.000 bis 99.999 0.007024 -0.213
## S_Wohnortgroesse100.000 bis 499.999 0.006009 -1.116
## S_Wohnortgroesse500.000 und mehr 0.006087 1.995
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur 0.004010 13.089
## S_Bildung_HVAbitur/(Fach-)Hochschulreife 0.005746 11.869
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum 0.005652 10.724
## JS_HUR_Reisedauer6 bis 8 Tage 0.011800 26.983
## JS_HUR_Reisedauer9 bis 12 Tage 0.011876 51.383
## JS_HUR_Reisedauer13 bis 15 Tage 0.011511 74.209
## JS_HUR_Reisedauer16 bis 19 Tage 0.013412 72.750
## JS_HUR_Reisedauer20 bis 22 Tage 0.012107 87.549
## JS_HUR_Reisedauer23 bis 26 Tage 0.017269 65.585
## JS_HUR_Reisedauer27 bis 29 Tage 0.015227 80.232
## JS_HUR_Reisedauer30 Tage und mehr 0.014287 94.547
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## S_Geschlechtweiblich 1.38e-09 ***
## S_Kinder_0_bis_5_binaerKinder dieser Altersstufe < 2e-16 ***
## S_Wohnortgroesse5.000 bis 49.999 0.1260
## S_Wohnortgroesse50.000 bis 99.999 0.8315
## S_Wohnortgroesse100.000 bis 499.999 0.2645
## S_Wohnortgroesse500.000 und mehr 0.0461 *
## S_Bildung_HVMittlere Reife/weiterführende Schule ohne Abitur < 2e-16 ***
## S_Bildung_HVAbitur/(Fach-)Hochschulreife < 2e-16 ***
## S_Bildung_HVUniversitaet/techn. Hochschule/Polytechnikum < 2e-16 ***
## JS_HUR_Reisedauer6 bis 8 Tage < 2e-16 ***
## JS_HUR_Reisedauer9 bis 12 Tage < 2e-16 ***
## JS_HUR_Reisedauer13 bis 15 Tage < 2e-16 ***
## JS_HUR_Reisedauer16 bis 19 Tage < 2e-16 ***
## JS_HUR_Reisedauer20 bis 22 Tage < 2e-16 ***
## JS_HUR_Reisedauer23 bis 26 Tage < 2e-16 ***
## JS_HUR_Reisedauer27 bis 29 Tage < 2e-16 ***
## JS_HUR_Reisedauer30 Tage und mehr < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## te(period,age) 62.55 72.347 19.98 <2e-16 ***
## s(S_Einkommen_HH_equi) 8.42 8.685 3072.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.321 Deviance explained = 35.5%
## fREML = 1.7708e+05 Scale est. = 0.46578 n = 170573
plot_APCheatmap(dat = dat_model, model = model_pure, plot_CI = FALSE)
plot_APCheatmap(dat = dat_model, model = model_cov, plot_CI = FALSE)
model_list <- list("Pure APC model" = model_pure,
"Covariate model" = model_cov)
plot_jointMarginalAPCeffects(model_list, dat_model)
model_list2 <- list("Covariate model including Reisedauer" = model_cov2,
"Covariate model" = model_cov)
plot_jointMarginalAPCeffects(model_list2, dat_model) + ylim(0, 5)
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
plot_partialAPCeffects(model_pure, dat_model, variable = "age")
plot_partialAPCeffects(model_pure, dat_model, variable = "period")
plot_partialAPCeffects(model_pure, dat_model, variable = "cohort")
plot_partialAPCeffects(model_cov, dat_model, variable = "age")
plot_partialAPCeffects(model_cov, dat_model, variable = "period")
plot_partialAPCeffects(model_cov, dat_model, variable = "cohort")
plot_partialAPCeffects(model_cov2, dat_model, variable = "age")
plot_partialAPCeffects(model_cov2, dat_model, variable = "period")
plot_partialAPCeffects(model_cov2, dat_model, variable = "cohort")
plot_1Dsmooth(model_cov, select = 2)
## Warning in plot_1Dsmooth(model_cov, select = 2): Note: After the delta method transformation some values of the
## lower confidence interval border resulted were negative. These
## values were set to 0.01
plot_1Dsmooth(model_cov2, select = 2)
gam.check(model_pure)
##
## Method: fREML Optimizer: perf newton
## full convergence after 7 iterations.
## Gradient range [-2.258822e-05,1.976199e-06]
## (score 255597.2 & scale 1.081368).
## Hessian positive definite, eigenvalue range [2.249184,87608.51].
## Model rank = 100 / 100
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(period,age) 99.0 63.7 0.94 0.36
gam.check(model_cov)
##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-0.002619785,0.002543637]
## (score 192871.6 & scale 0.5593304).
## Hessian positive definite, eigenvalue range [3.622746,85369.51].
## Model rank = 122 / 122
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(period,age) 99.00 57.49 0.93 0.025 *
## s(S_Einkommen_HH_equi) 9.00 8.38 0.95 0.325
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
gam.check(model_cov2)
##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-0.0005190619,0.0004942996]
## (score 177081.4 & scale 0.4657843).
## Hessian positive definite, eigenvalue range [3.468629,85275.51].
## Model rank = 126 / 126
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## te(period,age) 99.00 62.55 0.96 0.22
## s(S_Einkommen_HH_equi) 9.00 8.42 0.94 0.05 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1